Basic Commands
- Functions perform operations:
funcname(input1, input2)
- Create a vector of numbers:
x <- c(1,3,2,5)
x
## [1] 1 3 2 5
- Open new help file window:
?funcname
- Add two sets of numbers together:
x <- c(1,6,2)
y <- c(1,4,3)
x
## [1] 1 6 2
y
## [1] 1 4 3
length(x)
## [1] 3
length(y)
## [1] 3
x + y
## [1] 2 10 5
- Look at a list of all objects:
ls()
- Delete any that we don’t want:
rm()
- Remove all objects at once:
rm(list=ls())
- Matrices:
x=matrix(data=c(1,2,3,4), nrow=2,ncol=2)
x
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
x=matrix(c(1,2,3,4),2,2)
x
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
matrix(c(1,2,3,4),2,2,byrow=TRUE)
## [,1] [,2]
## [1,] 1 2
## [2,] 3 4
sqrt(x)
## [,1] [,2]
## [1,] 1.000000 1.732051
## [2,] 1.414214 2.000000
x^2
## [,1] [,2]
## [1,] 1 9
## [2,] 4 16
r(norm) generates a vector of random normal variables, with first argument n the sample size:
x=rnorm(50)
y=x+rnorm(50,mean=50,sd=.1)
cor(x,y)
## [1] 0.9950526
set.seed() allows our code to reproduce exact same set of random numbers
- Used whenever calculations involve random quanities
mean() and var() (sqrt(var())=standard deviation):
set.seed(3)
y=rnorm(100)
mean(y)
## [1] 0.01103557
var(y)
## [1] 0.7328675
sqrt(var(y))
## [1] 0.8560768
sd(y)
## [1] 0.8560768
Graphics
- Primary way to plot data:
plot()
- Scatterplot of the numbers in x versus the numbers in y:
plot(x,y)
- Additional options, e.g.
xlab applies label on the x-axis
x=rnorm(100)
y=rnorm(100)
plot(x,y)

plot(x,y,xlab="this is the x-axis",ylab="this is the y-axis",main="Plot of X vs Y")

- Save output of an R plot:
pdf(), jpeg(), etc
- Create a sequence of numbers:
seq(a,b)
x=seq(1,10)
x
## [1] 1 2 3 4 5 6 7 8 9 10
x=1:10
x
## [1] 1 2 3 4 5 6 7 8 9 10
x=seq(-pi,pi,length=50)
- Represent three-dimensional data: Contour plot
- Takes three arguments:
- A vector of the x values (first dimension)
- A vector of the y values (second dimension)
- A matric whose elements correspond to the z value (third dimension) for each pair of (x,y) coordinates
y=x
f=outer(x,y,function(x,y)cos(y)/(1+x^2))
contour(x,y,f)
contour(x,y,f,nlevels=45,add=T)

fa=(f-t(f))/2
contour(x,y,fa,nlevels=15)

image(): Works the same as contour(), except it produces a color-coded plot whose colors depend on the z value (AKA a heatmap)
persp(): Produce a three-dimensional plot
theta and phi control the angels at which the plot is viewed
image(x,y,fa)

persp(x,y,fa)

persp(x,y,fa,theta=30)

persp(x,y,fa,theta=30,phi=20)

persp(x,y,fa,theta=30,phi=70)

persp(x,y,fa,theta=30,phi=40)

Indexing Data
A=matrix(1:16,4,4)
A
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
## [3,] 3 7 11 15
## [4,] 4 8 12 16
A[2,3]
## [1] 10
A[c(1,3),c(2,4)]
## [,1] [,2]
## [1,] 5 13
## [2,] 7 15
A[1:3,2:4]
## [,1] [,2] [,3]
## [1,] 5 9 13
## [2,] 6 10 14
## [3,] 7 11 15
A[1:2,]
## [,1] [,2] [,3] [,4]
## [1,] 1 5 9 13
## [2,] 2 6 10 14
A[,1:2]
## [,1] [,2]
## [1,] 1 5
## [2,] 2 6
## [3,] 3 7
## [4,] 4 8
A[1,]
## [1] 1 5 9 13
A[-c(1,3),]
## [,1] [,2] [,3] [,4]
## [1,] 2 6 10 14
## [2,] 4 8 12 16
A[-c(1,3),-c(1,3,4)]
## [1] 6 8
dim(A) # number of rows followed by the number of columns given a matrix
## [1] 4 4
Loading Data
read.table() imports a data set
write.table() exports data
Auto=read.table("Auto.data.txt",header=T,na.strings="?")
#fix(Auto)
Auto=na.omit(Auto)
dim(Auto)
## [1] 392 9
names(Auto)
## [1] "mpg" "cylinders" "displacement" "horsepower"
## [5] "weight" "acceleration" "year" "origin"
## [9] "name"
Additional Graphical and Numerical Summaries
plot(Auto$cylinders, Auto$mpg)

attach(Auto)
plot(cylinders, mpg)

cylinders=as.factor(cylinders)
plot(cylinders, mpg)

plot(cylinders, mpg, col="red")

plot(cylinders, mpg, col="red", varwidth=T)

plot(cylinders, mpg, col="red", varwidth=T,horizontal=T)

plot(cylinders, mpg, col="red", varwidth=T, xlab="cylinders", ylab="MPG")

hist(mpg)

hist(mpg,col=2)

hist(mpg,col=2,breaks=15)

pairs(Auto)

pairs(~ mpg + displacement + horsepower + weight+ acceleration, Auto)

plot(horsepower,mpg)
identify(horsepower,mpg,name)

## integer(0)
summary(Auto)
## mpg cylinders displacement horsepower
## Min. : 9.00 Min. :3.000 Min. : 68.0 Min. : 46.0
## 1st Qu.:17.00 1st Qu.:4.000 1st Qu.:105.0 1st Qu.: 75.0
## Median :22.75 Median :4.000 Median :151.0 Median : 93.5
## Mean :23.45 Mean :5.472 Mean :194.4 Mean :104.5
## 3rd Qu.:29.00 3rd Qu.:8.000 3rd Qu.:275.8 3rd Qu.:126.0
## Max. :46.60 Max. :8.000 Max. :455.0 Max. :230.0
##
## weight acceleration year origin
## Min. :1613 Min. : 8.00 Min. :70.00 Min. :1.000
## 1st Qu.:2225 1st Qu.:13.78 1st Qu.:73.00 1st Qu.:1.000
## Median :2804 Median :15.50 Median :76.00 Median :1.000
## Mean :2978 Mean :15.54 Mean :75.98 Mean :1.577
## 3rd Qu.:3615 3rd Qu.:17.02 3rd Qu.:79.00 3rd Qu.:2.000
## Max. :5140 Max. :24.80 Max. :82.00 Max. :3.000
##
## name
## amc matador : 5
## ford pinto : 5
## toyota corolla : 5
## amc gremlin : 4
## amc hornet : 4
## chevrolet chevette: 4
## (Other) :365
summary(mpg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.00 17.00 22.75 23.45 29.00 46.60